Determining the effects of lake variables on the relationship of TP (log transformed ug/L) with Chl-a concentrations (log transformed ug/L), using the gradient of residuals against variables.

Linear model of logCHLA~logTP

Initial log-transformed linear model

Scatterplot

#linear model of chlorophyll to phosphorus
lm1 <- lm(logCHLA~logTP, data=df)
#creating residuals
df$res.lm <- resid(lm1)
plot.lm1 <- ggplot(df,aes(x=logTP,y=logCHLA))+
  geom_point()+
  geom_smooth(method='lm')+
  labs(title="Linear log-log model",x="log TP",y="log CHLA")
print(plot.lm1)
## `geom_smooth()` using formula 'y ~ x'

Model statistics

summary(lm1)
## 
## Call:
## lm(formula = logCHLA ~ logTP, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.6618 -0.4647  0.0533  0.5748  4.5179 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.92330    0.06886   86.01   <2e-16 ***
## logTP        1.03394    0.01596   64.79   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9418 on 6461 degrees of freedom
## Multiple R-squared:  0.3938, Adjusted R-squared:  0.3937 
## F-statistic:  4198 on 1 and 6461 DF,  p-value: < 2.2e-16
anova(lm1)
## Analysis of Variance Table
## 
## Response: logCHLA
##             Df Sum Sq Mean Sq F value    Pr(>F)    
## logTP        1 3723.5  3723.5  4197.6 < 2.2e-16 ***
## Residuals 6461 5731.3     0.9                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual plots

Remove outliers and rerun model

Identify outliers

Outliers were identified as points with studentized residuals outside 3:-3

Scatterplot without outliers

plot.lm2<- ggplot(df2,aes(x=logTP,y=logCHLA))+
  geom_point()+
  geom_smooth(method='lm')+
  labs(title="Linear log-log model without outliers",x="log TP",y="log CHLA")
print(plot.lm2)
## `geom_smooth()` using formula 'y ~ x'

Model statistics

summary(lm2)
## 
## Call:
## lm(formula = logCHLA ~ logTP, data = df2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.98721 -0.46928  0.03469  0.53016  2.81926 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.30979    0.06214  101.55   <2e-16 ***
## logTP        1.11476    0.01440   77.41   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8247 on 6356 degrees of freedom
## Multiple R-squared:  0.4853, Adjusted R-squared:  0.4852 
## F-statistic:  5993 on 1 and 6356 DF,  p-value: < 2.2e-16
anova(lm2)
## Analysis of Variance Table
## 
## Response: logCHLA
##             Df Sum Sq Mean Sq F value    Pr(>F)    
## logTP        1 4075.5  4075.5  5992.6 < 2.2e-16 ***
## Residuals 6356 4322.6     0.7                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual plots

Variable effects on model residuals

Relationship of model residuals to lake characteristic variables

Residual plots

Pattern of logCHLA~logTP model residuals along gradients of variables. All variables are fitted to a linear model on the residuals.

# only keep relevant variables
df3 <- df2 %>%
  mutate("Log(N:P)"=log(NPratio)) %>% 
  select(SAMPLE_DATE,
         `DEPTH, SECCHI DISK DEPTH_SD_NA`,
         `Log(N:P)`,
         `TEMPERATURE, WATER_OW_NA`,
         `TRUE COLOR_OW_T`,
         res.lm) %>% 
  rename("Sample date"=SAMPLE_DATE,
         "Secchi disk depth"=`DEPTH, SECCHI DISK DEPTH_SD_NA`,
         "Lake temperature"=`TEMPERATURE, WATER_OW_NA`,
         "True color"=`TRUE COLOR_OW_T`)

#loop
for(i in (2:ncol(df3)-1)){
  lm.df <- df3[is.finite(df3[,i]),] #removing NAs from relevant column
  lm.loop <- lm(res.lm ~ lm.df[,i], data=lm.df) #linear model of residuals against variable
  p1 <- ggplot(df3,aes(x=df3[,i],y=res.lm))+ #scatterplot
    geom_point()+
    geom_smooth(method='lm')+ 
    labs(x=paste(colnames(df3[i])),
         y='residuals of LM(logCHLA~logTP)')
  if(colnames(df3[i])=="Sample date"){
    p1 <- p1+scale_x_date(date_labels="%Y")
  }
  p2 <- ggplot(data=df3, aes(x=df3[,i])) + #density plot
    geom_density(fill="grey") +
    theme(axis.title.x=element_blank(),
          axis.text.x=element_blank(),
          axis.ticks.x=element_blank(),
          axis.title.y=element_blank(),
          axis.text.y=element_blank(),
          axis.ticks.y=element_blank(),
          panel.grid=element_blank(),
          axis.line=element_blank(),
          panel.background = element_blank())
  require(gridExtra)
  grid.arrange(p2, p1, ncol = 1, heights = c(1, 4)) #output plots
  stats[nrow(stats)+1,] <- c(colnames(df3[i]), #coefficients into stats table
                             coef(lm.loop),
                             summary(lm.loop)$coefficients[2,4],
                             summary(lm.loop)$r.squared)
  vars[nrow(vars)+1,] <- c(colnames(df3)[i], #coefficients into variables table
                           paste(min(as.numeric(na.omit(df3[,i]))),
                                 "-",max(as.numeric(na.omit(df3[,i])))),
                           nrow(df3[i]),
                           paste(median(as.numeric(na.omit(df3[,i]))),
                                 "(",summary(as.numeric(na.omit(df3[,i])))[2],
                                 "-",summary(as.numeric(na.omit(df3[,i])))[5],")")) #stats
}

## Error in FUN(X[[i]], ...): object 'True color' not found
## Error in eval(predvars, data, env): object 'True color' not found
## Error in lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...): 0 (non-NA) cases
## Error in coef(alm): object 'alm' not found
## Error in coef(blm): object 'blm' not found

Model statistics

Variable descriptive statistics

characteristicrangenmedian..IQR.
Sample date6007 - 18535635812684.5 ( 8230 - 15878.5 )
Secchi disk depth0.05 - 13.663583.3 ( 2 - 4.75 )
Log(N:P)-Inf - 5.6466240608162563580.0143887374520995 ( -0.671485683778766 - 1.0929824056555 )
Lake temperature4 - 36635822.7777777777778 ( 20 - 25 )
True color0 - 222635810 ( 6 - 19 )
ACID DEPOSITION213.6579857 - 1671.7035522985808.8840129 ( 465.2497559 - 975.3500061 )
PRECIPITATION76.6439971923828 - 192.6909942626952515105.960250854492 ( 96.1419982910156 - 119.623184926382 )
COLOR (LOW)Inf - -Inf0NA ( NA - NA )
COLOR (HIGH)Inf - -Inf0NA ( NA - NA )

Coefficients for linear models of residuals along gradient of variables. Bold values indicate significant p-values (<0.05). Sorted by rsquared.

characteristicinterceptslopep.valuersquared
Secchi disk depth0.542224704653416-0.153454418707041.53166787498659e-1730.120098166534876
ACID DEPOSITION-0.6313386915471560.0008307305289238771.64554161309906e-680.0975027001854195
Sample date0.779767999996377-6.33576953898594e-051.33224366106376e-1350.0921248014852935
Lake temperature0.355842599068243-0.01551027867239082.54317977377244e-070.00438443901887856
PRECIPITATION0.242481381755185-0.001101759836422970.2088067305897230.000628530903058732
Log(N:P)-0.0138260955201857-0.01335700406499170.096006276896210.000547682855133057
True color0.00550983361886227-0.0002920781818731630.6764553126934962.78514288861991e-05

Most recent year

## Error in `$<-.data.frame`(`*tmp*`, "DEPTH, BOTTOM_OW_NA", value = NA_real_): replacement has 1 row, data has 0
## Error in summary(lm.dep)$coefficients[2, 4]: subscript out of bounds

Recent year statistics

Variable descriptive statistics

characteristicyearsrangenmedian..IQR.
DEPTH, SECCHI DISK DEPTH_SD_NA2018 - 20200.65 - 11.852753.9 ( 2.25 - 5.25 )
NITROGEN, NITRATE-NITRITE_OW_T2018 - 20200.0035 - 0.6772750.0202 ( 0.01 - 0.0706 )
NPratio2018 - 20200.0737463126843658 - 115.5290102389082751.42387732749179 ( 0.694444444444445 - 4.58333333333333 )
TEMPERATURE, WATER_OW_NA2018 - 202015 - 30.527525 ( 22 - 26.75 )
TRUE COLOR_OW_T2018 - 20201 - 852757 ( 6 - 13.25 )
LAKE DEPTH2020 - 202011 - 119
ACID DEPOSITION (DIN:TP>1.5)2011 - 2018213.6579857 - 772.118072582
ACID DEPOISITION (DIN:TP<1.5)2017 - 2018213.6579857 - 712.899106176
PRECIPITATION2013 - 201676.9628023420061 - 131.316277313232126
NITRATE+NITRITE (DIN:TP<1.5)2017 - 20200.0035 - 0.118140
NITRATE+NITRITE (DIN:TP>1.5)2011 - 20200.01 - 0.67772

Coefficients for linear models of residuals along gradient of variables in most recent year. Bold values indicate significant p-values (<0.05). Sorted by rsquared.

characteristicinterceptslopep.valuersquared
DEPTH, SECCHI DISK DEPTH_SD_NA0.425775593905445-0.135591161179931.7863373757799e-100.141586467844989
NITROGEN, NITRATE-NITRITE_OW_T-0.0020484366413141-1.285537647858290.02766852930777670.0545088648472918
NITRATE+NITRITE (DIN:TP>1.5)-0.179167545093814-0.9365190287032930.1211023167245840.0349721672362281
TRUE COLOR_OW_T-0.1954146036620090.007510884443618830.03338539632262580.0166534142441452
ACID DEPOSITION (DIN:TP>1.5)-0.8654070630508430.00123854590421910.2792259502067010.0153878974582628
ACID DEPOSITION (DIN:TP<1.5)-0.8654070630508430.00123854590421910.2792259502067010.0153878974582628
PRECIPITATION0.657490394537693-0.01053368284122980.1765762638842380.0151656732974757
NPratio-0.0717610669637638-0.005813932877366630.2610776979593290.0144951145540703
NITRATE+NITRITE (DIN:TP<1.5)-0.07974176315969190.3454659092126850.9250061695625066.53877518406656e-05
TEMPERATURE, WATER_OW_NA-0.0776540042463184-0.001112322027553570.9387720557597222.18931765096922e-05

Color bins

High vs low color, sample date [1] “HIGH COLOR, N= 1444” [1] “LOW COLOR, N= 5017”

characteristicinterceptslopep.valuersquared
High color0.977892682796962-7.59397826560209e-052.44617881999168e-220.0681692927833179
Low color0.76644641965883-6.26936827840964e-052.58755039850473e-1190.104002177677548

Color trend, sample date [1] “INCREASING COLOR, N= 4250” [1] “DECREASING COLOR, N= 2108”

characteristicinterceptslopep.valuersquared
Increasing color0.861328156439938-7.24596124689312e-052.23795234284139e-1150.115492445831473
Decreasing color0.658100806089956-4.8852529203772e-058.67420733371484e-300.0592106598451909

Relationship of model residuals to year by lake

Pattern of logCHLA~logTP model residuals along sample date for each lake. Data from each lake are fitted to a linear model on the residuals.

Residual plots

A red regression line indicates the model is statistically insignificant (p>0.05). A red border medians that the lake had a negative chlorophyll trend and a flat or positive phosphorous trend.

#each lake
#loop over each lake
LAKE_IDS <- unique(df$LAKE_ID) #list lakes
coeffs <- data.frame(lake=character(),
                     intercept=double(),
                     slope=double(),
                     'p-value'=double(),
                     rsquared=double()) #blank coefficient table

incongruous <- read.csv("~/OneDrive - New York State Office of Information Technology Services/Rscripts/Trend/chl-tp-lakes.csv")
incongruous <- incongruous[which(incongruous$CHL_slope!=incongruous$PHOS_slope),]
incongruous <- incongruous[incongruous$CHL_slope=="neg",]

for (i in 1:length(LAKE_IDS)) {
  lake <- LAKE_IDS[[i]] #pick a lake
  dat <- df2[df2$LAKE_ID == lake,] #create new data with that lake
  resid.loop <- lm(logCHLA~logTP,data=dat) #linear model of chl vs tp
  dat$res.loop <- resid(resid.loop) #residuals for that lake
  lm.loop <- lm(res.loop~SAMPLE_DATE,data=dat) #linear model of residuals vs year
  co <- coef(lm.loop)
  plot <- ggplot(dat,aes(x=SAMPLE_DATE,y=res.loop))+ #plot lake
    geom_point() +
    labs(title=paste(lake),x='year',y='residuals of LM(logCHLA~logTP)')
  if(summary(lm.loop)$coefficients[,4]<0.05){ #plot significant lakes
    plot <- plot + geom_smooth(method='lm')
  }else{
    plot <- plot + geom_smooth(method='lm',aes(color="red"),show.legend=FALSE)
  }
  if(lake %in% incongruous$LAKE_ID){
    plot <- plot + theme(panel.border = element_rect(colour = "red", fill=NA, size=3))
  }else{}
  print(plot) #print
  coeffs[nrow(coeffs)+1,] <- c(lake,
                               co,
                               summary(lm.loop)$coefficients[2,4],
                               summary(lm.loop)$r.squared)
}

Model statistics

Coefficients for linear models of residuals along gradient of time for each lake. Bold values indicate significant p-values (<0.05). Asterisks (***) indicate that the lake had a negative chlorophyll trend and a flat or positive phosphorous trend.

lakeinterceptslopep.valuersquared
0202CHA01220.935502919564683-7.46180295662354e-051.50255775810091e-110.170538443604458
0202FIN01530.635749577648232-5.95078577676436e-050.0001745702049316340.0712769366388961
0302SOD00960.48666769255914-3.82719485604449e-050.01064145391406540.054010468237099
0403SIL01150.502854823465325-4.10077102364866e-050.001832053827526750.0576617991573737
0602BRA01541.06149274566162-8.76341561974301e-051.90262833445895e-110.255844767247596
0602CRO0073-0.02796840063245762.34944278403732e-060.8461006842089540.0002519619542885
0602EAR01460.851959664465121-6.94640784852235e-054.50387751003881e-060.124292588234535
0602EAT01631.3623772971199-0.0001121181180475241.63564369192852e-150.344014792791017
0602HAT01550.593254365900425-4.89064089889259e-052.94395645687317e-050.106817806226883
0602LEB01531.11902446562697-9.78474627625744e-051.5274614012525e-110.271692914186169
0602MEL00390.637271134764219-5.08639374049918e-056.48105278768977e-060.0866190932257852
0602MOR01520.891944581461986-7.51301268569576e-052.05080228334023e-080.113300646478107
0602PET00780.331789456726404-2.53168933367702e-050.03690766211523340.0234515777766319
0703CAZ01530.449504679704982-3.63390675897151e-050.002816178982327050.0540762194859375
0703DER0139A1.51722873171037-0.0001246064496920622.93793620926997e-260.367298184087431
0703TUS0153A0.949073117601566-8.16720216902249e-052.25647663987516e-100.148365549026748
0705COM03330.673891148746326-5.21493736896624e-050.0003186333594933140.0644480841680051
0705DUC02221.14655293221319-9.03839962163187e-058.00695783808632e-070.170245070080149
0801SEC0782B0.129851054598145-1.10468600256644e-050.2464082075649060.00725564585045498
0906BLA00011.10590132446955-9.06208131345451e-056.25783448358662e-080.127600869619626
0906BON00240.114791426411992-8.8938623292828e-060.245389759332290.00792918186512283
0906BUT00540.796388583921064-6.64826965822309e-051.57274435460665e-060.0944090894268698
1005GLE04410.367866424215463-2.90789951287784e-050.01361802827230250.0361108061211258
1102BAB11090.730036058917398-5.88644234673036e-050.0001889775104942190.0604552801273827
1104GOO0672A0.607548239418277-5.24133190855349e-050.001324578267038140.0722238220775747
1104SAC03140.591673654299803-5.09535723627264e-052.465641569285e-050.128355921621865
1104SCH03741.52501684883298-0.0001162133810052245.64158238577784e-100.202904429881148
1302CAR0062A0.435295439195985-3.97203176054331e-050.004321720767709890.0962057148638055
1302WAC01170.0706969591230919-5.66313319549469e-060.6023488490061190.00151966569190007
1308ROB09020.72838703360406-5.34849245969701e-050.004326189052573810.0648110381252874
1310QUE00570.50823785735609-3.99890117026576e-050.004478729614097250.0398814593146023
1401ANA02510.467390753646235-3.66754111085953e-050.003875740123622650.0386700308115073
1402WOL00371.12013529268377-9.92151035598942e-051.83213731872522e-080.231067618390379
1404OQU03831.091460025501-8.28530634906584e-057.14167052707765e-080.150071736337309
1501LUC0982B-0.2171361363638681.76125478750812e-050.3800005533774780.00714356847465044
1701LFR06621.42885257134606-0.0001053740542641063.28397374780162e-060.119867716701037

Distribution of slopes

Dot plot of slope for each lake. Grey dots are statistically insignificant (p>0.05).

Maps